"""
文档服务模块
提供文档向量数据库查询和外部文档API调用功能
"""

from dataclasses import dataclass
from typing import List, Dict, Any, Optional

import requests

from app.base.logger import setup_logger
from app.service.embedding_service import embedding_service
from app.service.milvus_service import MilvusService, MilvusConnectionConfig, MilvusFieldConfig
from app.utils.config_manager import config

# 使用新的日志配置
logger = setup_logger("documentation_service")


@dataclass
class DocumentationSearchResult:
    """文档搜索结果"""
    document_id: str
    content: str
    document_type: str
    score: float
    repo_id: str
    branch_name: str


class DocumentationService:
    """
    文档服务类，提供文档向量搜索和外部API调用功能
    """
    
    def __init__(self):
        self.milvus_service = None
        self.initialized = False
        self.error_message = None
        
        # 文档API配置
        self.doc_host = config.documentation_config.get("host", "localhost")
        self.doc_port = config.documentation_config.get("port", 8080)
        self.doc_api_base = config.documentation_config.get("api_base", "/documentation/api")
        self.doc_base_url = f"http://{self.doc_host}:{self.doc_port}{self.doc_api_base}"
    
    def initialize(self):
        """
        初始化文档服务
        
        Returns:
            bool: 初始化是否成功
        """
        try:
            logger.info("初始化文档服务")
            
            # 创建文档向量数据库配置
            connection_config = MilvusConnectionConfig(
                uri=config.milvus_config.get("uri", ""),
                token=config.milvus_config.get("token"),
                collection_name="documentation_vectors",  # 固定使用文档collection
                dimension=config.milvus_config.get("dimension", 1536)
            )
            
            # 创建文档字段配置
            field_config = MilvusFieldConfig(
                id_field="document_id",
                dense_vector_field="text_dense",
                sparse_vector_field="text_sparse",
                output_fields=[
                    "document_id", "content", "document_type", 
                    "created_at", "updated_at", "repo_id", "branch_name"
                ]
            )
            
            # 初始化Milvus服务
            self.milvus_service = MilvusService(
                connection_config=connection_config,
                field_config=field_config
            )
            
            milvus_success = self.milvus_service.connect()
            if not milvus_success:
                error_msg = self.milvus_service.get_error_message()
                logger.error(f"连接文档向量数据库失败: {error_msg}")
                self.error_message = f"文档向量数据库连接失败: {error_msg}"
                return False
            
            # 初始化嵌入服务
            embedding_success = embedding_service.initialize()
            if not embedding_success:
                error_msg = embedding_service.get_error_message()
                logger.error(f"初始化嵌入服务失败: {error_msg}")
                self.error_message = f"嵌入服务初始化失败: {error_msg}"
                return False
            
            self.initialized = True
            self.error_message = None
            logger.info("文档服务初始化成功")
            return True
            
        except Exception as e:
            logger.exception(f"初始化文档服务失败: {str(e)}")
            self.error_message = f"初始化异常: {str(e)}"
            self.initialized = False
            return False
    
    def search_aggregated_documents(self, 
                                  repo_id: str, 
                                  branch_name: str, 
                                  query_text: str, 
                                  topk: int = 5) -> List[DocumentationSearchResult]:
        """
        搜索聚合说明书（一级链路说明书）
        
        Args:
            repo_id: 仓库ID
            branch_name: 分支名称
            query_text: 查询文本
            topk: 返回结果数量
            
        Returns:
            List[DocumentationSearchResult]: 搜索结果列表
        """
        if not self.initialized:
            logger.error("文档服务未初始化，无法执行搜索")
            return []
        
        try:
            logger.info(f"搜索聚合说明书: repo_id={repo_id}, branch_name={branch_name}, query={query_text[:50]}...")
            
            # 生成查询向量
            query_vector = embedding_service.embed_text(query_text)
            if not query_vector:
                logger.error("无法生成查询向量")
                return []
            
            # 构建过滤表达式
            filter_expr = f'repo_id == "{repo_id}" and branch_name == "{branch_name}" and document_type == "document-flow"'
            
            # 执行向量搜索
            results = self.milvus_service.hybrid_search(
                query_text=query_text,
                query_dense_vector=query_vector,
                limit=topk,
                filter_expr=filter_expr,
                distance_threshold=0.7,  # 过滤相似度低于0.7的结果
            )
            
            if not results or len(results) == 0:
                logger.info("未找到匹配的聚合说明书")
                return []
            
            # 解析搜索结果
            search_results = []
            for result in results[0]:  # Milvus返回的是嵌套列表
                try:
                    entity = result.get('entity', {})
                    search_result = DocumentationSearchResult(
                        document_id=entity.get('document_id', ''),
                        content=entity.get('content', ''),
                        document_type=entity.get('document_type', ''),
                        score=result.get('distance', 0.0),
                        repo_id=entity.get('repo_id', ''),
                        branch_name=entity.get('branch_name', '')
                    )
                    search_results.append(search_result)
                except Exception as e:
                    logger.warning(f"解析搜索结果失败: {e}")
                    continue
            
            logger.info(f"搜索完成，找到 {len(search_results)} 个聚合说明书")
            return search_results
            
        except Exception as e:
            logger.exception(f"搜索聚合说明书失败: {str(e)}")
            return []
    
    def get_documentation_content(self, document_id: str) -> Optional[Dict[str, Any]]:
        """
        调用外部API获取说明书内容（单个文档）

        Args:
            document_id: 文档ID

        Returns:
            Dict[str, Any]: API响应数据，如果失败返回None
        """
        try:
            url = f"{self.doc_base_url}/documentation/query/aggregated/{document_id}"
            logger.info(f"调用文档API: {url}")

            response = requests.get(url, timeout=30)
            response.raise_for_status()

            data = response.json()
            if data.get('success'):
                logger.info(f"成功获取文档内容: document_id={document_id}")
                return data.get('data')
            else:
                logger.error(f"API返回失败: {data.get('message', '未知错误')}")
                return None

        except requests.exceptions.RequestException as e:
            logger.error(f"调用文档API失败: {e}")
            return None
        except Exception as e:
            logger.exception(f"获取文档内容异常: {e}")
            return None

    def get_documentation_content_batch(self, document_ids: List[str]) -> Optional[List[Dict[str, Any]]]:
        """
        批量调用外部API获取说明书内容

        Args:
            document_ids: 文档ID列表

        Returns:
            List[Dict[str, Any]]: API响应数据列表，如果失败返回None
        """
        try:
            url = f"{self.doc_base_url}/documentation/query/aggregated/batch"
            logger.info(f"批量调用文档API: {url}, 文档数量: {len(document_ids)}")

            # 构建请求体
            payload = {
                "documentIds": [int(doc_id) if doc_id.isdigit() else doc_id for doc_id in document_ids]
            }

            response = requests.post(
                url,
                json=payload,
                headers={'Content-Type': 'application/json'},
                timeout=30
            )
            response.raise_for_status()

            data = response.json()
            if data.get('success'):
                logger.info(f"成功批量获取文档内容: 返回 {len(data.get('data', []))} 个文档")
                return data.get('data', [])
            else:
                logger.error(f"批量API返回失败: {data.get('message', '未知错误')}")
                return None

        except requests.exceptions.RequestException as e:
            logger.error(f"批量调用文档API失败: {e}")
            return None
        except Exception as e:
            logger.exception(f"批量获取文档内容异常: {e}")
            return None
    
    def get_process_documentation(self, document_id: str) -> Optional[Dict[str, Any]]:
        """
        调用外部API获取流程说明书内容（单个文档）

        Args:
            document_id: 文档ID

        Returns:
            Dict[str, Any]: API响应数据，如果失败返回None
        """
        try:
            url = f"{self.doc_base_url}/documentation/query/process/{document_id}"
            logger.info(f"调用流程文档API: {url}")

            response = requests.get(url, timeout=30)
            response.raise_for_status()

            data = response.json()
            if data.get('success'):
                logger.info(f"成功获取流程文档内容: document_id={document_id}")
                return data.get('data')
            else:
                logger.error(f"API返回失败: {data.get('message', '未知错误')}")
                return None

        except requests.exceptions.RequestException as e:
            logger.error(f"调用流程文档API失败: {e}")
            return None
        except Exception as e:
            logger.exception(f"获取流程文档内容异常: {e}")
            return None

    def get_process_documentation_batch(self, document_ids: List[str]) -> Optional[List[Dict[str, Any]]]:
        """
        批量调用外部API获取流程说明书内容

        Args:
            document_ids: 文档ID列表

        Returns:
            List[Dict[str, Any]]: API响应数据列表，如果失败返回None
        """
        try:
            url = f"{self.doc_base_url}/documentation/query/process/batch"
            logger.info(f"批量调用流程文档API: {url}, 文档数量: {len(document_ids)}")

            # 构建请求体
            payload = {
                "documentIds": [int(doc_id) if doc_id.isdigit() else doc_id for doc_id in document_ids]
            }

            response = requests.post(
                url,
                json=payload,
                headers={'Content-Type': 'application/json'},
                timeout=30
            )
            response.raise_for_status()

            data = response.json()
            if data.get('success'):
                logger.info(f"成功批量获取流程文档内容: 返回 {len(data.get('data', []))} 个文档")
                return data.get('data', [])
            else:
                logger.error(f"批量流程API返回失败: {data.get('message', '未知错误')}")
                return None

        except requests.exceptions.RequestException as e:
            logger.error(f"批量调用流程文档API失败: {e}")
            return None
        except Exception as e:
            logger.exception(f"批量获取流程文档内容异常: {e}")
            return None
    
    def close(self):
        """关闭服务连接"""
        try:
            if self.milvus_service:
                self.milvus_service.disconnect()
            self.initialized = False
            logger.info("文档服务已关闭")
            return True
        except Exception as e:
            logger.exception(f"关闭文档服务失败: {str(e)}")
            return False
    
    def get_error_message(self):
        """获取最后一次错误信息"""
        return getattr(self, 'error_message', "未知错误")


# 创建全局实例
documentation_service = DocumentationService()
